package us.codecraft.webmagic.samples;

import com.taobao.api.domain.NTbkItem;
import com.taobao.api.response.TbkItemCouponGetResponse;
import com.taobao.api.response.TbkItemGetResponse;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.utils.APIUtils;

import java.text.DecimalFormat;
import java.util.List;

/**
 * @描述:
 * @作者 骆文龙
 * @创建时间 2017-09-28.
 * @版本: v1.0
 */
public class TaoBaoProcessor implements PageProcessor {
    private List<String> date;
    private Site site = Site.me().setCycleRetryTimes(5).setRetryTimes(5).setSleepTime(500).setTimeOut(3 * 60 * 1000)
            .setUserAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0")
            .addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
            .addHeader("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3")
            .setCharset("UTF-8");

    @Override
    public void process(Page page) {
        date = page.getHtml().xpath("//li[@class='J_Cat a-all']/span/a/text()").all();
//        page.putField("content",page.getHtml());
        int size = 0;
        for (String str : date) {
            System.out.println("淘宝页面一级类目 =>" + str);
            ++size;
        }
        System.out.println("类目总个数为=>" + size);
    }

    @Override
    public Site getSite() {
        return site;
    }

    public static void main(String[] args) {
        System.setProperty("https.protocols", "TLSv1");
        DecimalFormat df = new DecimalFormat("0.000");
        TaoBaoProcessor taoBaoProcessor = new TaoBaoProcessor();
        Spider.create(taoBaoProcessor).addUrl("https://www.taobao.com/").thread(5).run();
        for (String str : taoBaoProcessor.date) {
            List<TbkItemCouponGetResponse.TbkCoupon> resultList = APIUtils.itemCat(str).getResults();
            if (resultList != null && resultList.size() > 0) {
                try {
                        Thread.sleep(20000);
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                    }
                System.out.println("淘宝页面一级类目 =>" + str + "类别ID为 => " + resultList.get(0).getCategory());
                TbkItemGetResponse tbkItemGetResponse = APIUtils.itemGetDemo(resultList.get(0).getCategory() + "");
                Long total = tbkItemGetResponse.getTotalResults();
                Long pages = total % 100 == 0 ? total / 100 : total / 100 + 1;
                System.out.println("total =>" + total + "        pages =>" + pages);
                Long okCount = 0L;
                System.out.print("\t进度:");
                for (long i = 1; i <= 100; i++) {
                    System.out.print(df.format(i / 1.0 / 100 * 100.0) + "%     "+i+"   ");
                    List<NTbkItem> nTbkItems = null;
                    nTbkItems = APIUtils.itemGetDemo(resultList.get(0).getCategory() + "", i).getResults();
                    if (nTbkItems != null && nTbkItems.size() > 0) {
                        for (NTbkItem nTbkItem : tbkItemGetResponse.getResults()) {
                            if (Float.parseFloat(nTbkItem.getZkFinalPrice()) >= 80.0) {
                                okCount++;
                            }
                        }
                    }
                }
                System.out.println("\n淘宝页面一级类目  =>" + str + "   大部分商品在80以上的百分比为 => " + okCount / 1.0 / 100  * 100 + "%");
            } else {
                System.out.println("淘宝页面一级类目 =>" + str + " ,该类别ID未找到");
            }
        }
    }
}
